#Heterodontosaurus breathing paper August 2020 analysis

#require some packages that allow you to do various stuff
library(ape) #phylogenetic analysis
library(tidyverse) #data manipulation
library(ggplot2) #visualization
library(GGally) #visualization
library(ggfortify) #visualization
library(phytools) #phylogenetically corrected PCA
library(strap) #time-calibrate phylogeny
library(geiger) #modeling continuous characters on the tree
library(nlme) #phylogenetic correction
library(maps)
library(ggpubr)
library(dplyr)

#read data
setwd("C:/")
Hip.raw<-read.csv("C:~/rawpelvic2020.csv", header=TRUE, row.names=1)
head(Hip.raw)
str(Hip.raw)

#log measurements
Hip.log<-Hip.raw
Hip.log$logaleng<-log10(Hip.log$appl)
Hip.log$logpubl<-log10(Hip.log$publ)
Hip.log$logiscl<-log10(Hip.log$iscl)
Hip.log$logfeml<-log10(Hip.log$feml)


#check new data frame
str(Hip.log)
Hip.log

##investigate relationships between variables
ggpairs(Hip.log, columns=5:8)


#load your tree. Included in the files
tree<-read.newick("C:~/Tree2020_Aug.phy")
#check that tree
plot(tree)
str(tree)
#load the ages of taxa
ages<-read.csv("C:~/times2020.csv", header=TRUE, row.names=1)

#check those ages
ages

#double-check that names in tree and names in agefile are the same
name.check(tree, ages) #if "OK" prints then good to go, most common error is that rownames not specified in read.csv
name.check(Hip.log, tree)
name.check(tree, ages)

#assign branch lengths to tree
tree<-compute.brlen(tree, 1)

#set branch lengths to age estimates, not allowing zero-length branches, root length must be >0
tree<-DatePhylo(tree, ages, rlen=5, method ="equal")

#report back on the structure of the tree
str(tree)

#right ladderize tree for viewing
reorder(tree)

#examine tree
plotTree(tree)


##Okay, now it's time to do some analysis incorporating body size and phylogenetic corrections
#first let's do a pGLS regression of logfeml on logapp (correcting for phylogenetic covariance)
#Hip.log contains these variables of interest: logapp; logpubl; logiscl; logfeml

#set covariance structure as Brownian Motion (bm)
bm<-corBrownian(1, tree)
bm


glsFL.LOGAPPLENG<-gls(logaleng ~ logfeml, data=Hip.log, correlation = bm)
structure(glsFL.LOGAPPLENG)
glsFL.LOGAPPLENG

gls.LOGDISTP<-gls(logpubl ~ logfeml, data=Hip.log, correlation=bm)
structure(gls.LOGDISTP)
gls.LOGDISTP

glsFL.LOGDISTISC<-gls(logiscl ~ logfeml, data=Hip.log, correlation=bm)
structure(glsFL.LOGDISTISC)
glsFL.LOGDISTISC


#now let's get the residuals

#APP length residuals
glsFL.LOGAPPLENG.res<-resid(glsFL.LOGAPPLENG)
glsFL.LOGAPPLENG.res
summary(glsFL.LOGAPPLENG.res)
#Distal Pubis residuals
glsFL.LOGDISTP.res<-resid(gls.LOGDISTP)
glsFL.LOGDISTP.res
#Distal ischium residuals
glsFL.LOGDISTISC.res<-resid(glsFL.LOGDISTISC)
glsFL.LOGDISTISC.res


##check to see if they're normally distributed
#first visual inspection

hist(glsFL.LOGAPPLENG.res, main="Log APP length residuals")

hist(glsFL.LOGDISTP.res, main="Log pubic length residuals")

hist(glsFL.LOGDISTISC.res, main="Log ischial length residuals")


ggqqplot(glsFL.LOGAPPLENG.res, main="Log APP length residuals")

ggqqplot(glsFL.LOGDISTP.res, main="Log pubic length residuals")

ggqqplot(glsFL.LOGDISTISC.res, main="Log ischial length residuals")


#then parametric test (the problem is the low sample size, here interpret with caution)

shapiro.test(glsFL.LOGAPPLENG.res)
shapiro.test(glsFL.LOGDISTP.res) 
shapiro.test(glsFL.LOGDISTISC.res)

##plot residuals vs LOG_FL to see if they're correlated with body size


plot(Hip.log$logfeml, glsFL.LOGAPPLENG.res, main="Femoral length by Log APP length residuals")

plot(Hip.log$logfeml, glsFL.LOGDISTP.res, main="Femoral length by Log pubic length residuals")

plot(Hip.log$logfeml, glsFL.LOGDISTISC.res, main="Femoral length by Log ischial length residuals")

#so now you know that the residuals of the phylogenetically corrected logFL vs logSQRTA are normally distributed and uncorrelated with LOGFL, 

#This means that residuals are a good measure of phylogenetically and body-size independent changes in APP length 

##Now if we just wished to map some of these things as continuous characters on the tree we can easily do so: 

#Here is a heat map of the residuals of the length of the APP after accounting for body size and phylogeny
cont.map.applres<-contMap(tree, glsFL.LOGAPPLENG.res[tree$tip.label], lwd=6)
axis(1)
title(main="APP length residuals", xlab="Time from root")

#Heat map of Pubis residuals
cont.map.pubisres<-contMap(tree, glsFL.LOGDISTP.res[tree$tip.label],lwd=6)
axis(1)
title(main="Pubic residuals", xlab="Time from root")

#Heat map of Ischium residuals
cont.map.ischres<-contMap(tree, glsFL.LOGDISTISC.res[tree$tip.label],lwd=6)
axis(1)
title(main="Ischial residuals", xlab="Time from root")

#make an object x, where x are the resiuals of the APP length
x<-as.data.frame(glsFL.LOGAPPLENG.res, row.names=rownames(glsFL.LOGAPPLENG.res))
x

#make an object Y, where Y are the residuals of the Distal Pubes
y<-as.data.frame(glsFL.LOGDISTP.res, row.names=rownames(glsFL.LOGDISTP.res))
y

#make an object z, where z are the residuals of the Ischial length
z<-as.data.frame(glsFL.LOGDISTISC.res, row.names=rownames(glsFL.LOGDISTISC.res))
z


#we need to make vectors to map them effectively
XX <- as.numeric(x[ , 1 ])
names( XX ) <- rownames( x ) 
XX
YY <- as.numeric( y[ ,1])
names( YY ) <- rownames(y)
obj<-fastAnc(tree, YY)
YY
ZZ<-as.numeric(z[,1])
names(ZZ)<-rownames(z)
obj<-fastAnc(tree, ZZ)
ZZ


#Then we can plot a phenogram showing the distribution of a given value over the tree and time. 
#This is very helpful for investigating how values change at a glance
help(phenogram)

#APP length:
phenogram(tree, XX[ tree$tip.label ], spread.labels=TRUE, fsize=0.85)
title(main="Phenogram APP length")

#Distal Pubes:
phenogram(tree, YY[ tree$tip.label ], spread.labels=TRUE, fsize=0.85)
title(main="Phenogram Pubis")

#Distal Ischium:
phenogram(tree, ZZ[ tree$tip.label ], spread.labels=TRUE, fsize=0.85)
title(main="Phenogram Ischium")

###one of the last things we might want to do is to fit models of continuous character evolution for what you think are important functional variables, 
#such as PC2 and residuals for the APP. 
#This is done with the geiger package.

##APP residuals & evo models:

#derive some estimate of the standard error for the model fitting functions
#APP length models
sdresX<-sd(XX)
sdresX
seresX<-(sdresX/sqrt(24))
seresX

BM.resX<-fitContinuous(tree,XX, SE=seresX, model="BM")
OU.resX<-fitContinuous(tree,XX, SE=seresX, model="OU")
EB.resX<-fitContinuous(tree,XX, SE=seresX, model="EB")
Drift.resX<-fitContinuous(tree,XX, SE=seresX, model="drift")
stasis.resX<-fitContinuous(tree,XX, SE=seresX, model="white")
BM.resX
OU.resX
EB.resX
Drift.resX
stasis.resX

help("fitContinuous")

aic.scoresX<-c(BM.resX$opt$aicc, OU.resX$opt$aicc, EB.resX$opt$aicc, Drift.resX$opt$aicc, stasis.resX$opt$aicc)
names(aic.scoresX)<-c("BM","OU","EB","Drift","stasis")
aic.scoresX
aic.w.APP<-aic.w(aic.scoresX)
aic.w.APP

#Distal Pubis residuals & evo models:
sdresY<-sd(YY)
sdresY
seresY<-sdresY/sqrt(24)
seresY

BM.resY<-fitContinuous(tree,YY, SE=seresY, model="BM")
OU.resY<-fitContinuous(tree,YY, SE=seresY, model="OU")
EB.resY<-fitContinuous(tree,YY, SE=seresY, model="EB")
Drift.resY<-fitContinuous(tree,YY, SE=seresY, model="drift")
stasis.resY<-fitContinuous(tree,YY, SE=seresY, model="white")
BM.resY
OU.resY
EB.resY
Drift.resY
stasis.resY

aic.scoresY<-c(BM.resY$opt$aicc, OU.resY$opt$aicc, EB.resY$opt$aicc, Drift.resY$opt$aicc, stasis.resY$opt$aicc)
names(aic.scoresY)<-c("BM","OU","EB","Drift","stasis")
aic.scoresY
aic.w.publ<-aic.w(aic.scoresY)
aic.w.publ

#Distal Ischium residuals & evo models:
sdresZ<-sd(ZZ)
sdresZ
seresZ<-sdresZ/sqrt(24)
seresZ

BM.resZ<-fitContinuous(tree,ZZ, SE=seresZ, model="BM")
OU.resZ<-fitContinuous(tree,ZZ, SE=seresZ, model="OU")
EB.resZ<-fitContinuous(tree,ZZ, SE=seresZ, model="EB")
Drift.resZ<-fitContinuous(tree,ZZ, SE=seresZ, model="drift")
stasis.resZ<-fitContinuous(tree,ZZ, SE=seresZ, model="white")
BM.resZ
OU.resZ
EB.resZ
Drift.resZ
stasis.resZ

aic.scoresZ<-c(BM.resZ$opt$aicc, OU.resZ$opt$aicc, EB.resZ$opt$aicc, Drift.resZ$opt$aicc, stasis.resZ$opt$aicc)
names(aic.scoresZ)<-c("BM","OU","EB","Drift","stasis")
aic.scoresZ
aic.w.iscl<-aic.w(aic.scoresZ)
aic.w.iscl


#the likelihood ratio test can compare the fit of any two models as a chisquare with the following stat of twice the difference between log-likelihoods of any given models. 
#For example for the BM and trend this would be equal to: lrt=2(-5.912553-(-4.146858))
#so we can test whether the best and next best models are significantly different this way

lrtAleng=2*(27.157151-13.245325)
lrtAleng
lrtpubl=2*(17.606974-14.465541)
lrtpubl
lrtiscl=2*(23.860079-24.318695)
lrtiscl

#we can then use the chisquare to get a p value for this
Pval_lrt_Aleng<-pchisq(lrtAleng, 1, lower.tail = FALSE)
Pval_lrt_publ<-pchisq(lrtpubl, 1, lower.tail=FALSE)
Pval_lrt_iscl<-pchisq(lrtiscl, 1, lower.tail=FALSE)

Pval_lrt_Aleng #highly significant, p=1.328915e-07
Pval_lrt_publ #highly significant, p=0.01219108
Pval_lrt_iscl #nonsignificant, p=1



